********************************************************************************
* Date: October 24, 2017
* Claudia
* Code for linking schools to nearest wind monitors, calculating downwind value for
* schools, getting annual averages (by hour) of the downwind, windtreat and circtreat
* variables. 


********************************************************************************

clear all 
set more off
set maxvar 32767


local main "C:\Users\Claudia Persico\Dropbox\Research on Florida Wind Patters"

*********************
scalar prefdist = 5 	// how many miles between school and wind monitor?
********************* 
scalar mitoroad = 1 // how many miles between school and road?
********************* 

forval yy = 2003/2012 {

********************************************************************************
// 1. Create a data set with uniqueID, school lon and lat
********************************************************************************

foreach sample in mjrrds {

cd "`main'\roadsetup\samples\remake\"

use "schooltomajorrds_`sample'_5closestA", clear // EDITED BY JAH 5/14

merge 1:1 ncessch using "`main'\School wind pollution\School wind road\NCES_2010" // EDITED BY JAH 5/14
tab _merge
drop if _merge==2

keep if mi_to_nid1<=mitoroad // mitoroad defined on line 19
keep uniqueID ncessch schnam latcod loncod
rename (schnam latcod loncod) (schoolname latitude longitude)
gen id = 1
tempfile school
save `school', replace

********************************************************************************
// 2.  Link schools to nearest wind monitors used in the final first stage data set
// winddir_dropmonitor dropped all monitors with >5% zero values 
********************************************************************************
use "`main'\School wind pollution\School wind road\winddir_dropmonitor_`yy'.dta", clear 
levelsof station, loc(stationID) clean
scalar nwind = wordcount("`stationID'")
tempfile winddir
save `winddir', replace
keep station lat lon
drop if mi(lat) & mi(lon)
duplicates drop station, force
drop if station==""
gen id = 1
rename (lat lon) (lat_ lon_)
reshape wide lat_ lon_, i(id) j(station) string
merge 1:m id using `school', nogen
drop id
order uniqueID schoolname latitude longitude

// calculate distance between each wind station and each school
foreach stid of local stationID {
	geodist latitude longitude lat_`stid' lon_`stid', gen(dist_`stid') miles sphere
	}
	
// sort distances in ascending order
rowsort dist_*, gen(dist1-dist`=nwind') 
drop dist2-dist`=nwind'
// identify closest wind stations within `prefdist' miles of school
forval vv = 1/1 { 
	replace dist`vv' = . if dist`vv'>prefdist // exclude wind sites that are further than "prefdist" miles, prefdist specified at the very top 
	gen windsite`vv' = ""
	gen windlat`vv' = .
	gen windlon`vv' = .
	foreach ww of local stationID {
		replace windsite`vv' = "`ww'" if abs(dist`vv' - dist_`ww')<0.0001
		replace windlat`vv' = lat_`ww' if abs(dist`vv' - dist_`ww')<0.0001
		replace windlon`vv' = lon_`ww' if abs(dist`vv' - dist_`ww')<0.0001
	}
}
dropmiss *, force // drops distance variables that have all missing observations
drop dist_* lat_* lon_*
drop if mi(windsite1)
order uniqueID ncessch schoolname dist* windsite*
tempfile nearest
save `nearest', replace
save "`main'\roadsetup\samples\remake\school_wind_5nearestA_`yy'", replace

********************************************************************************
// 3. Merge school-station to wind direction data
// First expand schools to have a full year 8am-3pm observations, then loop to merge
********************************************************************************
gen date = date("`yy'/01/01", "YMD") if _n==1
replace date = date("`yy'/12/31", "YMD") if _n==_N
format date %td
carryforward date, replace

xtset uniqueID date
tsfill, full  // for each school, generate dates from jan 1 to dec 31
bysort uniqueID: carryforward ncessch schoolname *1 *lat* *lon*, replace
gsort uniqueID -date // take care of the last school on the list
by uniqueID: carryforward ncessch schoolname *1 *lat* *lon*, replace
sort uniqueID date
expand 2 // duplicates the dataset
bysort uniqueID date: gen id = _n
gen hh = hms(8,00,00) if id == 1
replace hh = hms(15,00,00) if id==2
gen double datetime = date*24*60*60*1000 + hh
format datetime %tcNN/DD/CCYY_HH:MM
egen panelvar = group(uniqueID date)
xtset panelvar datetime, delta(1 hour)
tsfill	
ds panelvar, not
bysort panelvar: carryforward `r(varlist)', replace
drop date hh id panelvar
gen station = windsite1
tempfile schfill
save `schfill', replace
// merge with wind direction data
use "`main'\School wind pollution\School wind road\winddir_dropmonitor_`yy'.dta", clear 
merge 1:m station datetime using `schfill', nogen
drop if mi(uniqueID)
order datetime uniqueID ncessch schoolname dist1 windsite1 circmean windvalue im_circ* im_wind* *lat* *lon*
sort uniqueID datetime
rename (latitude longitude) (schlat schlon)

save "`main'\roadsetup\samples\remake\schwind`=prefdist'mi_schroad`=mitoroad'mi_`sample'_5nearestA_`yy'.dta", replace  

}
}



